##Load in required packages
require (plyr)
require(dplyr)
require(ggplot2)
require(openxlsx)
require(xtable)
require(stargazer)
require(psych)
##Call in the data
Data <- read.csv("Replication_package/Data.csv")
#get the standardized variables for the index
Data$pwd_std<-(Data$pwd - mean(Data$pwd))/ sd(Data$pwd)
Data$age_std<-(Data$age - mean(Data$age, na.rm = T))/ sd(Data$age, na.rm = T)
Data$co2_std<-(Data$sum_co2 - mean(Data$sum_co2, na.rm = T))/ sd(Data$sum_co2, na.rm = T)
#note that the lowest 2 observations are zero and -ve and they should be NA 
Data$age_std<-ifelse(Data$age==0 ,NA,Data$age_std) 
##note that age zero means data is NA, so they are changed them so as not to miscalculate the index
#************************************************************************
##create index (equal weight to all variables)#still missing correct annual_co2 )
Data$index = (1/3 * Data$age_std)+ (1/3 * Data$pwd_std)+ (1/3 * Data$co2_std)
#******************************************************************
#Get summary stats for variables
#For age, drop 0s as they are missing and not 0 years old
Data$age<-ifelse(Data$age==0 ,NA,Data$age) 
sd(Data$age, na.rm = TRUE)
summary(Data$age, na.rm= TRUE)
#PWD
summary(Data$pwd)
sd(Data$pwd)
#Annual CO2 
sd(Data$sum_co2, na.rm = TRUE) 
summary(Data$sum_co2)

#***********************************************
#ANALYSIS
#GET top 20 plant (top 1%) based on index
top20<-top_n(Data, 20, index)
top20<- top20[,-c(3,7,12,15,16)]
#Get some summary stats on top 20 critical plant
summary(top20$age)
sd(top20$age, na.rm= T)
summary(top20$sum_co2)
sd(top20$sum_co2, na.rm= T)
summary(top20$sum_capacity)
sd(top20$sum_capacity, na.rm= T)
#print(xtable(top20, type = "latex"), file = "top20plants.tex")

##Get capacity per country 
countries<- ddply(Data,.(country ),summarize,
                  index=max(index, na.rm = T), sum_co2= sum(sum_co2), age= mean(age, na.rm = T), gpwv4popula=sum(gpwv4popula, na.rm = T), sum_capacity=sum(sum_capacity))
countries_top20plant<- ddply(top20,.(country ),summarize,
                             Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
#print(xtable(countries_top20plant, type = "latex"), file = "countries_top20plants.tex")
#*******************************************************
#Other weights-SA
sens_anlys<-  Data
#Main index
#sens_anlys$index = (1/3 * Data$age_std)+ (1/3 * Data$pwd_std)+ (1/3 * Data$co2_std)
#Increase weight of PWD to 50% and split the weights between age and CO2
sens_anlys$indpwd_50 = (1/4 * Data$age_std)+ (1/2 * Data$pwd_std)+ (1/4 * Data$co2_std)
#Increase weight of both PWD  and CO2 to 50% each and remove age
sens_anlys$indage_0 = (1/2 * Data$pwd_std)+ (1/2 * Data$co2_std)
#Increase weight of CO2 to 50% and split the weights between age and PWD
sens_anlys$indco2_50 = (1/4 * Data$age_std)+ (1/4 * Data$pwd_std)+ (1/2 * Data$co2_std)
#Increase weight of Age to 50% and split the weights between CO2 and PWD
sens_anlys$indage_50 = (1/2 * Data$age_std)+ (1/4 * Data$pwd_std)+ (1/4 * Data$co2_std)
#Increase weight of PWD to 75% and split the weights between CO2 and age
sens_anlys$indpwd_75 = (1/8 * Data$age_std)+ (3/4 * Data$pwd_std)+ (1/8 * Data$co2_std)
#Increase weight of CO2 to 75% and split the weights between PWD and age
sens_anlys$indco2_75 = (1/8 * Data$age_std)+ (1/8 * Data$pwd_std)+ (3/4 * Data$co2_std)
#Increase weight of Age to 75% and split the weights between CO2 and age
sens_anlys$indage_75 = (3/4 * Data$age_std)+ (1/8 * Data$pwd_std)+ (1/8 * Data$co2_std)

#--------------------------------
#top 20 of each index
##Table 5 in the paper is a sum up of the min and max values required per country based on the following:
#--------------------------------
#PWD 50%
top20_pwd50<-top_n(sens_anlys, 20, indpwd_50)
top20_pwd50<- top20_pwd50[,-c(10:17)]
print(xtable(top20_pwd50, type = "latex"), file = "top20_pwd50.tex")
#get summary per country 
countries_top20_pwd50<- ddply(top20_pwd50,.(country ),summarize,
                              Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
print(xtable(countries_top20_pwd50, type = "latex"), file = "countries_top20_pwd50.tex")

#AGE 0%
top20_age0<-top_n(sens_anlys, 20, indage_0)
top20_age0<- top20_age0[,-c(10:17)]
print(xtable(top20_age0, type = "latex"), file = "top20_age0.tex")
## summary per country 
countries_top20_age0<- ddply(top20_age0,.(country ),summarize,
                             Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
print(xtable(countries_top20_age0, type = "latex"), file = "countries_top20_age0.tex")

#CO2 50%
top20_co250<-top_n(sens_anlys, 20, indco2_50)
top20_co250<- top20_co250[,-c(10:17)]
print(xtable(top20_co250, type = "latex"), file = "top20_co250.tex")
#get summary per country
countries_top20_co250<- ddply(top20_age0,.(country ),summarize,
                              Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
print(xtable(countries_top20_co250, type = "latex"), file = "countries_top20_co250.tex")

#Age 50%
top20_age50<-top_n(sens_anlys, 20, indage_50)
top20_age50<- top20_age50[,-c(10:17)]
print(xtable(top20_age50, type = "latex"), file = "top20_age50.tex")
#get summary per country 
countries_top20_age50<- ddply(top20_age50,.(country ),summarize,
                              Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
print(xtable(countries_top20_age50, type = "latex"), file = "countries_top20_age50.tex")

#PWD 75%
top20_pwd75<-top_n(sens_anlys, 20, indpwd_75)
top20_pwd75<- top20_pwd75[,-c(10:17)]
print(xtable(top20_pwd75, type = "latex"), file = "top20_pwd75.tex")
#get summary per country 
countries_top20_pwd75<- ddply(top20_pwd75,.(country ),summarize,
                              Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
print(xtable(countries_top20_pwd75, type = "latex"), file = "countries_top20_pwd75.tex")

#CO2 75%
top20_co275<-top_n(sens_anlys, 20, indco2_75)
top20_co275<- top20_co275[,-c(10:17)]
print(xtable(top20_co275, type = "latex"), file = "top20_co275.tex")
#get summary per country 
countries_top20_co275<- ddply(top20_co275,.(country ),summarize,
                              Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
print(xtable(countries_top20_co275, type = "latex"), file = "countries_top20_co275.tex")

#Age 75%
top20_age75<-top_n(sens_anlys, 20, indage_75)
top20_age75<- top20_age75[,-c(10:17)]
print(xtable(top20_age75, type = "latex"), file = "top20_age75.tex")
#get summary per country 
countries_top20_age75<- ddply(top20_age75,.(country ),summarize,
                              Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
print(xtable(countries_top20_age75, type = "latex"), file = "countries_top20_age75.tex")
#***************************************************************************************
#Top 50, 100 and 10% of plants
top50<-top_n(Data, 50, index)
countries_top50plant<- ddply(top50,.(country ),summarize,
                             Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
print(xtable(countries_top50plant, type = "latex"), file = "countries_top50.tex")

top100<-top_n(Data, 100, index)
countries_top100plant<- ddply(top100,.(country ),summarize,
                              Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
print(xtable(countries_top100plant, type = "latex"), file = "countries_top100.tex")

#Top 10% are approximately 215 plants
top215<-top_n(Data, 215, index)

countries_top215plant<- ddply(top215,.(country ),summarize,
                              Capacity=sum(sum_capacity), CO2= sum(sum_co2), Population_weighted_damage =sum(pwd, na.rm = T),Age= mean(age, na.rm = T), Population=sum(gpwv4popula, na.rm = T))
print(xtable(countries_top215plant, type = "latex"), file = "countries_top215.tex")
